{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sys\n",
    "from scipy.sparse import lil_matrix\n",
    "import scipy as scp\n",
    "import time\n",
    "%load_ext Cython"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ITEM_NUM = 4318201\n",
    "\n",
    "def get_logs_from_hardisk(path):\n",
    "    f = open(path, 'r')\n",
    "    a = f.read()\n",
    "    dict_name = eval(a)\n",
    "    f.close()\n",
    "    return dict_name\n",
    "\n",
    "\n",
    "f = open('usersActivity_map.txt', 'r')\n",
    "m = f.read()\n",
    "user_times_map = eval(m)\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%cython\n",
    "\n",
    "import datetime\n",
    "import math\n",
    "\n",
    "cpdef calculate_matrix(mat, list user_logs, dict user_times_map):\n",
    "    cdef int index, i1, i2, count\n",
    "    cdef list item_log\n",
    "    cdef tuple u\n",
    "    \n",
    "    count = 0\n",
    "    for u in user_logs:\n",
    "        count += 1\n",
    "        if count % 1000 == 0:\n",
    "            print('The %d'%count + ' users are finished.')\n",
    "            print(datetime.datetime.now().strftime('%H:%M:%S'))\n",
    "            \n",
    "        item_log = u[1]   \n",
    "\n",
    "        for index, i1 in enumerate(item_log):\n",
    "            for i2 in item_log[(index+1): ]:\n",
    "                weight = 1/(math.log(1+user_times_map[u[0]]))\n",
    "                mat[i1, i2] += weight\n",
    "                mat[i2, i1] += weight\n",
    "    return mat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The 0  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "17:42:50\n",
      "The 2000 users are finished.\n",
      "17:43:20\n",
      "The 3000 users are finished.\n",
      "17:43:52\n",
      "The 4000 users are finished.\n",
      "17:44:22\n",
      "The 5000 users are finished.\n",
      "17:44:59\n",
      "The 6000 users are finished.\n",
      "17:45:46\n",
      "The 7000 users are finished.\n",
      "17:46:36\n",
      "The 8000 users are finished.\n",
      "17:47:37\n",
      "The 9000 users are finished.\n",
      "17:48:41\n",
      "The 10000 users are finished.\n",
      "17:49:35\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 10000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "17:53:02\n",
      "The 2000 users are finished.\n",
      "17:53:50\n",
      "The 3000 users are finished.\n",
      "17:54:37\n",
      "The 4000 users are finished.\n",
      "17:55:29\n",
      "The 5000 users are finished.\n",
      "17:56:14\n",
      "The 6000 users are finished.\n",
      "17:57:02\n",
      "The 7000 users are finished.\n",
      "17:57:52\n",
      "The 8000 users are finished.\n",
      "17:58:46\n",
      "The 9000 users are finished.\n",
      "17:59:42\n",
      "The 10000 users are finished.\n",
      "18:00:31\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 20000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "18:04:28\n",
      "The 2000 users are finished.\n",
      "18:05:15\n",
      "The 3000 users are finished.\n",
      "18:05:57\n",
      "The 4000 users are finished.\n",
      "18:06:42\n",
      "The 5000 users are finished.\n",
      "18:07:43\n",
      "The 6000 users are finished.\n",
      "18:08:31\n",
      "The 7000 users are finished.\n",
      "18:09:18\n",
      "The 8000 users are finished.\n",
      "18:10:11\n",
      "The 9000 users are finished.\n",
      "18:11:10\n",
      "The 10000 users are finished.\n",
      "18:12:07\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 30000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "18:16:06\n",
      "The 2000 users are finished.\n",
      "18:16:47\n",
      "The 3000 users are finished.\n",
      "18:17:29\n",
      "The 4000 users are finished.\n",
      "18:18:22\n",
      "The 5000 users are finished.\n",
      "18:19:11\n",
      "The 6000 users are finished.\n",
      "18:20:07\n",
      "The 7000 users are finished.\n",
      "18:20:51\n",
      "The 8000 users are finished.\n",
      "18:21:43\n",
      "The 9000 users are finished.\n",
      "18:22:34\n",
      "The 10000 users are finished.\n",
      "18:23:22\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 40000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "18:26:30\n",
      "The 2000 users are finished.\n",
      "18:27:04\n",
      "The 3000 users are finished.\n",
      "18:27:45\n",
      "The 4000 users are finished.\n",
      "18:28:23\n",
      "The 5000 users are finished.\n",
      "18:29:07\n",
      "The 6000 users are finished.\n",
      "18:29:53\n",
      "The 7000 users are finished.\n",
      "18:30:35\n",
      "The 8000 users are finished.\n",
      "18:31:27\n",
      "The 9000 users are finished.\n",
      "18:32:06\n",
      "The 10000 users are finished.\n",
      "18:32:55\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 50000  batch is started!\n",
      "--------------------------------\n",
      "The 3000 users are finished.\n",
      "18:37:33\n",
      "The 4000 users are finished.\n",
      "18:38:13\n",
      "The 5000 users are finished.\n",
      "18:38:56\n",
      "The 6000 users are finished.\n",
      "18:39:38\n",
      "The 7000 users are finished.\n",
      "18:40:13\n",
      "The 8000 users are finished.\n",
      "18:40:47\n",
      "The 9000 users are finished.\n",
      "18:41:28\n",
      "The 10000 users are finished.\n",
      "18:42:05\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 60000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "18:44:52\n",
      "The 2000 users are finished.\n",
      "18:45:25\n",
      "The 3000 users are finished.\n",
      "18:46:00\n",
      "The 4000 users are finished.\n",
      "18:46:32\n",
      "The 5000 users are finished.\n",
      "18:47:01\n",
      "The 6000 users are finished.\n",
      "18:47:34\n",
      "The 7000 users are finished.\n",
      "18:48:04\n",
      "The 8000 users are finished.\n",
      "18:48:42\n",
      "The 9000 users are finished.\n",
      "18:49:16\n",
      "The 10000 users are finished.\n",
      "18:49:50\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 70000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "18:52:13\n",
      "The 2000 users are finished.\n",
      "18:52:43\n",
      "The 3000 users are finished.\n",
      "18:53:12\n",
      "The 4000 users are finished.\n",
      "18:53:43\n",
      "The 5000 users are finished.\n",
      "18:54:11\n",
      "The 6000 users are finished.\n",
      "18:54:42\n",
      "The 7000 users are finished.\n",
      "18:55:12\n",
      "The 8000 users are finished.\n",
      "18:55:39\n",
      "The 9000 users are finished.\n",
      "18:56:12\n",
      "The 10000 users are finished.\n",
      "18:56:50\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 80000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "18:59:02\n",
      "The 2000 users are finished.\n",
      "18:59:33\n",
      "The 3000 users are finished.\n",
      "19:00:08\n",
      "The 4000 users are finished.\n",
      "19:00:39\n",
      "The 5000 users are finished.\n",
      "19:01:10\n",
      "The 6000 users are finished.\n",
      "19:01:37\n",
      "The 7000 users are finished.\n",
      "19:02:12\n",
      "The 8000 users are finished.\n",
      "19:02:41\n",
      "The 9000 users are finished.\n",
      "19:03:10\n",
      "The 10000 users are finished.\n",
      "19:03:36\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 90000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:05:46\n",
      "The 2000 users are finished.\n",
      "19:06:09\n",
      "The 3000 users are finished.\n",
      "19:06:33\n",
      "The 4000 users are finished.\n",
      "19:06:57\n",
      "The 5000 users are finished.\n",
      "19:07:20\n",
      "The 6000 users are finished.\n",
      "19:07:42\n",
      "The 7000 users are finished.\n",
      "19:08:07\n",
      "The 8000 users are finished.\n",
      "19:08:31\n",
      "The 9000 users are finished.\n",
      "19:08:54\n",
      "The 10000 users are finished.\n",
      "19:09:20\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 100000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:11:13\n",
      "The 2000 users are finished.\n",
      "19:11:36\n",
      "The 3000 users are finished.\n",
      "19:11:57\n",
      "The 4000 users are finished.\n",
      "19:12:16\n",
      "The 5000 users are finished.\n",
      "19:12:35\n",
      "The 6000 users are finished.\n",
      "19:12:57\n",
      "The 7000 users are finished.\n",
      "19:13:17\n",
      "The 8000 users are finished.\n",
      "19:13:39\n",
      "The 9000 users are finished.\n",
      "19:13:57\n",
      "The 10000 users are finished.\n",
      "19:14:18\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 110000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:15:49\n",
      "The 2000 users are finished.\n",
      "19:16:08\n",
      "The 3000 users are finished.\n",
      "19:16:25\n",
      "The 4000 users are finished.\n",
      "19:16:48\n",
      "The 5000 users are finished.\n",
      "19:17:10\n",
      "The 6000 users are finished.\n",
      "19:17:31\n",
      "The 7000 users are finished.\n",
      "19:17:50\n",
      "The 8000 users are finished.\n",
      "19:18:11\n",
      "The 9000 users are finished.\n",
      "19:18:29\n",
      "The 10000 users are finished.\n",
      "19:18:49\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 120000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:20:14\n",
      "The 2000 users are finished.\n",
      "19:20:28\n",
      "The 3000 users are finished.\n",
      "19:20:44\n",
      "The 4000 users are finished.\n",
      "19:20:59\n",
      "The 5000 users are finished.\n",
      "19:21:14\n",
      "The 6000 users are finished.\n",
      "19:21:28\n",
      "The 7000 users are finished.\n",
      "19:21:42\n",
      "The 8000 users are finished.\n",
      "19:21:57\n",
      "The 9000 users are finished.\n",
      "19:22:10\n",
      "The 10000 users are finished.\n",
      "19:22:25\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 130000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:23:36\n",
      "The 2000 users are finished.\n",
      "19:23:50\n",
      "The 3000 users are finished.\n",
      "19:24:01\n",
      "The 4000 users are finished.\n",
      "19:24:15\n",
      "The 5000 users are finished.\n",
      "19:24:31\n",
      "The 6000 users are finished.\n",
      "19:24:44\n",
      "The 7000 users are finished.\n",
      "19:24:56\n",
      "The 8000 users are finished.\n",
      "19:25:08\n",
      "The 9000 users are finished.\n",
      "19:25:20\n",
      "The 10000 users are finished.\n",
      "19:25:31\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 140000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:26:26\n",
      "The 2000 users are finished.\n",
      "19:26:37\n",
      "The 3000 users are finished.\n",
      "19:26:46\n",
      "The 4000 users are finished.\n",
      "19:26:57\n",
      "The 5000 users are finished.\n",
      "19:27:07\n",
      "The 6000 users are finished.\n",
      "19:27:16\n",
      "The 7000 users are finished.\n",
      "19:27:26\n",
      "The 8000 users are finished.\n",
      "19:27:37\n",
      "The 9000 users are finished.\n",
      "19:27:46\n",
      "The 10000 users are finished.\n",
      "19:27:54\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 150000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:28:38\n",
      "The 2000 users are finished.\n",
      "19:28:46\n",
      "The 3000 users are finished.\n",
      "19:28:53\n",
      "The 4000 users are finished.\n",
      "19:29:02\n",
      "The 5000 users are finished.\n",
      "19:29:10\n",
      "The 6000 users are finished.\n",
      "19:29:18\n",
      "The 7000 users are finished.\n",
      "19:29:26\n",
      "The 8000 users are finished.\n",
      "19:29:35\n",
      "The 9000 users are finished.\n",
      "19:29:43\n",
      "The 10000 users are finished.\n",
      "19:29:51\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 160000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:30:32\n",
      "The 2000 users are finished.\n",
      "19:30:38\n",
      "The 3000 users are finished.\n",
      "19:30:44\n",
      "The 4000 users are finished.\n",
      "19:30:50\n",
      "The 5000 users are finished.\n",
      "19:30:54\n",
      "The 6000 users are finished.\n",
      "19:31:00\n",
      "The 7000 users are finished.\n",
      "19:31:05\n",
      "The 8000 users are finished.\n",
      "19:31:09\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The 9000 users are finished.\n",
      "19:31:14\n",
      "The 10000 users are finished.\n",
      "19:31:18\n",
      "save successfully\n",
      "--------------------------------\n",
      "The 170000  batch is started!\n",
      "--------------------------------\n",
      "The 1000 users are finished.\n",
      "19:31:46\n",
      "The 2000 users are finished.\n",
      "19:31:50\n",
      "The 3000 users are finished.\n",
      "19:31:53\n",
      "The 4000 users are finished.\n",
      "19:31:56\n",
      "save successfully\n",
      "--------------------------------\n"
     ]
    }
   ],
   "source": [
    "user_logs = get_logs_from_hardisk('full_logs/userlogs_group4.txt')\n",
    "f = open('upward_map.txt','r')\n",
    "upward_map = eval(f.read())\n",
    "f.close()\n",
    "for u in user_logs:\n",
    "    user_logs[u] = [int(upward_map[x]) for x in user_logs[u]]\n",
    "user_logs = list(user_logs.items())\n",
    "\n",
    "\n",
    "for i in range(0, len(user_logs), 10000):\n",
    "    print('The %d '%i + ' batch is started!')\n",
    "    print('--------------------------------')\n",
    "    mat = lil_matrix((ITEM_NUM+1, ITEM_NUM+1), dtype=float)\n",
    "    mat = calculate_matrix(mat, user_logs[i: i + 10000], user_times_map)\n",
    "    scp.sparse.save_npz('tmpData/sparse_matrix_%d_batch_group4.npz'%i, mat.tocsr())\n",
    "    print('save successfully')\n",
    "    print('--------------------------------')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
